# -*- coding: utf-8 -*-
import scrapy
import pymysql.cursors
from jianshu.items import JianShuArticleDetail


class JianshuDataSpider(scrapy.Spider):
    handle_httpstatus_list = [404]
    url = '118.89.193.59'
    port = 3306
    database_username = 'root'
    database_password = '123123anquan'
    database_name = 'JianShu'
    user_collection = 'User'
    article_collection = 'Article'

    name = 'jianshu_data'
    allowed_domains = ['www.jianshu.com']
    start_urls = ['http://www.jianshu.com/']


    def start_requests(self):
        try:
            connection = pymysql.connect(host=self.url,
                                        port=self.port,
                                        user=self.database_username,
                                        password=self.database_password,
                                        db=self.database_name,
                                        charset='utf8mb4',
                                        cursorclass=pymysql.cursors.DictCursor)
            with connection.cursor() as cursor:
                sql = 'SELECT article_token FROM Article WHERE article_detail IS NULL'
                cursor.execute(sql)
                result = cursor.fetchall()
                self.dataList = result
                for index in range(len(result)):
                    item = result[index]
                    yield scrapy.Request('https://www.jianshu.com/p/' + item["article_token"], self.parse)
                cursor.close()
                connection.close()

        except Exception as e:
            print('失败哦')
            print(str(e))
            pass
        finally:
            pass
        

        pass


    def parse(self, response):

        result = response.xpath('//div[@class="show-content-free"]').extract()
        
        str1 = " "
        str1 = str1.join(result)
        print('文章详情')

        url = response.url
        article_token = url.split('/')[-1]
        print(str1)
        jianshuDetil = JianShuArticleDetail()
        jianshuDetil["article_token"] = article_token
        jianshuDetil["article_detail"] = str1
        yield jianshuDetil

        pass
